Iris データセットでのパーセプトロンモデルのトレーニング

パーセプトロンの計算式

https://gyazo.com/84bb6e0f26b647c0808b236d23bd5df5 https://gyazo.com/1dc502e7238ef57d322820b8f2b24066

パーセプトロンの実装

code: Python

import numpy as np

class Perceptron(object):

"""パーセプトロンの分類器

パラメータ

---------------

eta: float

学習率（0.0より大きく1.0以下の値）

n_iter: int

トレーニングデータのトレーニング回数

random_state: int

重みを初期化するための乱数シード

属性

---------------

w_: 1次元配列

適合後の重み

errors_: リスト

各エポックでの誤分類（更新）の数

"""

def __init__(self, eta=0.01, n_iter=50, random_state=1):

self.eta = eta

self.n_iter = n_iter

self.random_state = random_state

def fit(self, X, y):

"""トレーニングデータに適合させる

パラメータ

---------------

X: {配列のようなデータ構造}, shape = n_samples, n_features

トレーニングデータ

n_samplesはサンプルの個数、n_featuresは特徴量の個数

y: 配列のようなデータ構造, shape = n_samples

目的変数

戻り値

---------------

self: object

"""

# 標準偏差0.01の正規分布から抽出された乱数で初期値を設定

rgen = np.random.RandomState(self.random_state)

self.w_ = rgen.normal(loc=0.0, scale=0.01, size=1 + X.shape1)

self.errors_ = []

for _ in range(self.n_iter): # トレーニング回数分トレーニングデータを反復

errors = 0

for xi, target in zip(X, y): # 各サンプルで重みを更新

# 重み w_1, ..., w_m の更新

update = self.eta * (target - self.predict(xi))

self.w_0 += update

self.w_1: += update * xi

# 重みの更新が0でない場合は誤分類としてカウント

errors += int(update != 0.0)

# 反復回数ごとの誤差を格納

self.errors_.append(errors)

return self

def net_input(self, X):

"""総入力（w^T * x）を計算"""

return np.dot(X, self.w_1:) + self.w_0

def predict(self, X):

"""1ステップ後のクラスラベルを返す"""

return np.where(self.net_input(X) >= 0.0, 1, -1)

Iris データセットでのパーセプトロンモデルのトレーニング

code: Python

import pandas as pd

df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)

df.tail()

---------------------------------------------------------------------------------------

0 1 2 3 4

145 6.7 3.0 5.2 2.3 Iris-virginica

146 6.3 2.5 5.0 1.9 Iris-virginica

147 6.5 3.0 5.2 2.0 Iris-virginica

148 6.2 3.4 5.4 2.3 Iris-virginica

149 5.9 3.0 5.1 1.8 Iris-virginica

---------------------------------------------------------------------------------------

code: Python

import matplotlib.pyplot as plt

import numpy as np

# 1-100行目の目的変数の抽出（今回は2値分類問題にしたいため、setosaとversicolorの２つの花だけを使用）

y = df.iloc0:100, 4.values

# Iris-setosaを-1、Iris-versicolorを1に変換

y = np.where(y == 'Iris-setosa', -1, 1)

# 1-100行目の1、3列目の抽出

X = df.iloc[0:100, 0, 2].values

# 品種setosaのプロット（赤の〇）

plt.scatter(X:50, 0, X:50, 1, color='red', marker='o', label='seota')

# 品種versicolorのプロット（青の✕）

plt.scatter(X50:100, 0, X50:100, 1, color='blue', marker='x', label='versicolor')

# 軸のラベルの設定

plt.xlabel('sepal length cm')

plt.ylabel('petal length cm')

# 凡例の設定（左上に配置）

plt.legend(loc='upper left')

# 図の表示

plt.show()

https://gyazo.com/7a8980a6b382c2f33860a7b2aa40caff

上図から、このデータセットは線形分離可能なことがわかります。

code: Python

# パーセプトロンのオブジェクトの生成（インスタンス化）

ppn = Perceptron(eta=0.1, n_iter=10)

# トレーニングデータへのモデルの適合

ppn.fit(X, y)

# エポックと誤分類誤差の関係の折れ線グラフをプロット

plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')

# 軸のラベルの設定

plt.xlabel('Epochs')

plt.ylabel('Number of update')

# 図の表示

plt.show()

https://gyazo.com/d74eea0c30c47c9f6012314f5812e002

上図から、6回目のエポックの後、パーセプトロンはすでに収束していて、トレーニングサンプルを完璧に分類できるようになっていることがわかります。

code: Python

from matplotlib.colors import ListedColormap

def plot_decision_regions(X, y, classifier, resolution=0.02):

# マーカーとカラーマップの準備

markers = ('s', 'x', 'o', '^', 'v')

colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan')

cmap = ListedColormap(colors:len(np.unique(y)))

# 決定領域のプロット

x1_min, x1_max = X:, 0.min() - 1, X:, 0.max() + 1

x2_min, x2_max = X:, 1.min() - 1, X:, 1.max() + 1

# グリッドポイントの生成

xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution))

# 各特徴量を1次元配列に変換して予測を実行

Z = classifier.predict(np.array(xx1.ravel(), xx2.ravel()).T)

# 予測結果を元のグリッドポイントのデータサイズに変換

Z = Z.reshape(xx1.shape)

# グリッドポイントの等高線のプロット

plt.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap)

# 軸の範囲の設定

plt.xlim(xx1.min(), xx1.max())

plt.ylim(xx2.min(), xx2.max())

# クラスごとにサンプルをプロット

for idx, cl in enumerate(np.unique(y)):

plt.scatter(x=Xy == cl, 0,

y=Xy == cl, 1,

alpha=0.8,

c=colorsidx,

marker=markersidx,

label=cl,

edgecolor='black')

# 決定領域のプロット

plot_decision_regions(X, y, classifier=ppn)

# 軸のラベルの設定

plt.xlabel('sepal length cm')

plt.ylabel('petal length cm')

# 凡例の設定（左上に配置）

plt.legend(loc='upper left')

# 図の表示

plt.show()

https://gyazo.com/281212f3f93f57ece3efe71f661efd2c